{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pandas Data Types\n",
    "Accompanying the PB Python article [here](http://pbpython.com/pandas_dtypes.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"https://github.com/chris1610/pbpython/blob/master/data/sales_data_types.csv?raw=True\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002.0</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>$125,000.00</td>\n",
       "      <td>$162500.00</td>\n",
       "      <td>30.00%</td>\n",
       "      <td>500</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278.0</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>$920,000.00</td>\n",
       "      <td>$101,2000.00</td>\n",
       "      <td>10.00%</td>\n",
       "      <td>700</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477.0</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>$50,000.00</td>\n",
       "      <td>$62500.00</td>\n",
       "      <td>25.00%</td>\n",
       "      <td>125</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900.0</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>$350,000.00</td>\n",
       "      <td>$490000.00</td>\n",
       "      <td>4.00%</td>\n",
       "      <td>75</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029.0</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>$15,000.00</td>\n",
       "      <td>$12750.00</td>\n",
       "      <td>-15.00%</td>\n",
       "      <td>Closed</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name         2016          2017  \\\n",
       "0          10002.0  Quest Industries  $125,000.00    $162500.00   \n",
       "1         552278.0    Smith Plumbing  $920,000.00  $101,2000.00   \n",
       "2          23477.0   ACME Industrial   $50,000.00     $62500.00   \n",
       "3          24900.0        Brekke LTD  $350,000.00    $490000.00   \n",
       "4         651029.0         Harbor Co   $15,000.00     $12750.00   \n",
       "\n",
       "  Percent Growth Jan Units  Month  Day  Year Active  \n",
       "0         30.00%       500      1   10  2015      Y  \n",
       "1         10.00%       700      6   15  2014      Y  \n",
       "2         25.00%       125      3   29  2016      Y  \n",
       "3          4.00%        75     10   27  2015      Y  \n",
       "4        -15.00%    Closed      2    2  2014      N  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use df.info and df.dtypes to look at the types that pandas automatically infers based on the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5 entries, 0 to 4\n",
      "Data columns (total 10 columns):\n",
      "Customer Number    5 non-null float64\n",
      "Customer Name      5 non-null object\n",
      "2016               5 non-null object\n",
      "2017               5 non-null object\n",
      "Percent Growth     5 non-null object\n",
      "Jan Units          5 non-null object\n",
      "Month              5 non-null int64\n",
      "Day                5 non-null int64\n",
      "Year               5 non-null int64\n",
      "Active             5 non-null object\n",
      "dtypes: float64(1), int64(3), object(6)\n",
      "memory usage: 480.0+ bytes\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Since the 2016 and 2017 columns were read in as objects, trying to add the values will result in string concatenation not numerical addition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      $125,000.00$162500.00\n",
       "1    $920,000.00$101,2000.00\n",
       "2        $50,000.00$62500.00\n",
       "3      $350,000.00$490000.00\n",
       "4        $15,000.00$12750.00\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2016'] + df['2017']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The simplest way to to convert to a type is using astype.\n",
    "\n",
    "We can apply it to the customer number first."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     10002\n",
       "1    552278\n",
       "2     23477\n",
       "3     24900\n",
       "4    651029\n",
       "Name: Customer Number, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Customer Number'].astype('int')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The code above does not alter the original dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Customer Number    float64\n",
       "Customer Name       object\n",
       "2016                object\n",
       "2017                object\n",
       "Percent Growth      object\n",
       "Jan Units           object\n",
       "Month                int64\n",
       "Day                  int64\n",
       "Year                 int64\n",
       "Active              object\n",
       "dtype: object"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Assign the new integer customer number back to the original frame and check the type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Customer Number     int64\n",
       "Customer Name      object\n",
       "2016               object\n",
       "2017               object\n",
       "Percent Growth     object\n",
       "Jan Units          object\n",
       "Month               int64\n",
       "Day                 int64\n",
       "Year                int64\n",
       "Active             object\n",
       "dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"Customer Number\"] = df['Customer Number'].astype('int')\n",
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>$125,000.00</td>\n",
       "      <td>$162500.00</td>\n",
       "      <td>30.00%</td>\n",
       "      <td>500</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>$920,000.00</td>\n",
       "      <td>$101,2000.00</td>\n",
       "      <td>10.00%</td>\n",
       "      <td>700</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>$50,000.00</td>\n",
       "      <td>$62500.00</td>\n",
       "      <td>25.00%</td>\n",
       "      <td>125</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>$350,000.00</td>\n",
       "      <td>$490000.00</td>\n",
       "      <td>4.00%</td>\n",
       "      <td>75</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>$15,000.00</td>\n",
       "      <td>$12750.00</td>\n",
       "      <td>-15.00%</td>\n",
       "      <td>Closed</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name         2016          2017  \\\n",
       "0            10002  Quest Industries  $125,000.00    $162500.00   \n",
       "1           552278    Smith Plumbing  $920,000.00  $101,2000.00   \n",
       "2            23477   ACME Industrial   $50,000.00     $62500.00   \n",
       "3            24900        Brekke LTD  $350,000.00    $490000.00   \n",
       "4           651029         Harbor Co   $15,000.00     $12750.00   \n",
       "\n",
       "  Percent Growth Jan Units  Month  Day  Year Active  \n",
       "0         30.00%       500      1   10  2015      Y  \n",
       "1         10.00%       700      6   15  2014      Y  \n",
       "2         25.00%       125      3   29  2016      Y  \n",
       "3          4.00%        75     10   27  2015      Y  \n",
       "4        -15.00%    Closed      2    2  2014      N  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The data all looks good for the Customer Number.\n",
    "\n",
    "If we try to convert the Jan Units column, we will get an error."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "invalid literal for int() with base 10: 'Closed'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-10-31333711e4a4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Jan Units'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'int'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m                     \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 118\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0m_deprecate_kwarg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[1;32m   4002\u001b[0m         \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4003\u001b[0m         new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,\n\u001b[0;32m-> 4004\u001b[0;31m                                      **kwargs)\n\u001b[0m\u001b[1;32m   4005\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4006\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[1;32m   3460\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3461\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3462\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'astype'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3463\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3464\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[1;32m   3327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3328\u001b[0m             \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'mgr'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3329\u001b[0;31m             \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3330\u001b[0m             \u001b[0mresult_blocks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3331\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m    542\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'raise'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    543\u001b[0m         return self._astype(dtype, copy=copy, errors=errors, values=values,\n\u001b[0;32m--> 544\u001b[0;31m                             **kwargs)\n\u001b[0m\u001b[1;32m    545\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m     def _astype(self, dtype, copy=False, errors='raise', values=None,\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36m_astype\u001b[0;34m(self, dtype, copy, errors, values, klass, mgr, **kwargs)\u001b[0m\n\u001b[1;32m    623\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    624\u001b[0m                 \u001b[0;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m                 \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    626\u001b[0m                 \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    627\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy)\u001b[0m\n\u001b[1;32m    690\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobject_\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missubdtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minteger\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    691\u001b[0m         \u001b[0;31m# work around NumPy brokenness, #1987\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 692\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype_intsafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    693\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    694\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"datetime64\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"timedelta64\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.astype_intsafe\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/src/util.pxd\u001b[0m in \u001b[0;36mutil.set_value_at_unsafe\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'Closed'"
     ]
    }
   ],
   "source": [
    "df['Jan Units'].astype('int')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In a similar manner we get an error if we try to convert the sales column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: '$15,000.00'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-11-999869d577b0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'2016'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'float'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m                     \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 118\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0m_deprecate_kwarg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[1;32m   4002\u001b[0m         \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4003\u001b[0m         new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,\n\u001b[0;32m-> 4004\u001b[0;31m                                      **kwargs)\n\u001b[0m\u001b[1;32m   4005\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4006\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[1;32m   3460\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3461\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3462\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'astype'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3463\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3464\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[1;32m   3327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3328\u001b[0m             \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'mgr'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3329\u001b[0;31m             \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3330\u001b[0m             \u001b[0mresult_blocks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3331\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m    542\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'raise'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    543\u001b[0m         return self._astype(dtype, copy=copy, errors=errors, values=values,\n\u001b[0;32m--> 544\u001b[0;31m                             **kwargs)\n\u001b[0m\u001b[1;32m    545\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m     def _astype(self, dtype, copy=False, errors='raise', values=None,\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36m_astype\u001b[0;34m(self, dtype, copy, errors, values, klass, mgr, **kwargs)\u001b[0m\n\u001b[1;32m    623\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    624\u001b[0m                 \u001b[0;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m                 \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    626\u001b[0m                 \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    627\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pbp3/lib/python3.6/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy)\u001b[0m\n\u001b[1;32m    701\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    702\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 703\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    704\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    705\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '$15,000.00'"
     ]
    }
   ],
   "source": [
    "df['2016'].astype('float')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can try to use astype with a bool type but that does not give expected results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    True\n",
       "1    True\n",
       "2    True\n",
       "3    True\n",
       "4    True\n",
       "Name: Active, dtype: bool"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Active'].astype('bool')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Customer Number     int64\n",
       "Customer Name      object\n",
       "2016               object\n",
       "2017               object\n",
       "Percent Growth     object\n",
       "Jan Units          object\n",
       "Month               int64\n",
       "Day                 int64\n",
       "Year                int64\n",
       "Active             object\n",
       "dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# astype can take a dictionary of column names and data types\n",
    "df.astype({'Customer Number': 'int', 'Customer Name': 'str'}).dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In order to convert the currency and percentages, we need to use custom functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_currency(val):\n",
    "    \"\"\"\n",
    "    Convert the string number value to a float\n",
    "     - Remove $\n",
    "     - Remove commas\n",
    "     - Convert to float type\n",
    "    \"\"\"\n",
    "    new_val = val.replace(',','').replace('$', '')\n",
    "    return float(new_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_percent(val):\n",
    "    \"\"\"\n",
    "    Convert the percentage string to an actual floating point percent\n",
    "    \"\"\"\n",
    "    new_val = val.replace('%', '')\n",
    "    return float(new_val) / 100"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use apply to convert the 2016 and 2017 columns to floating point numbers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    125000.0\n",
       "1    920000.0\n",
       "2     50000.0\n",
       "3    350000.0\n",
       "4     15000.0\n",
       "Name: 2016, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2016'].apply(convert_currency)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     162500.0\n",
       "1    1012000.0\n",
       "2      62500.0\n",
       "3     490000.0\n",
       "4      12750.0\n",
       "Name: 2017, dtype: float64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2017'].apply(convert_currency)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We could use a lambda function as well but it may be more difficult for new users to understand"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    125000.0\n",
       "1    920000.0\n",
       "2     50000.0\n",
       "3    350000.0\n",
       "4     15000.0\n",
       "Name: 2016, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['2016'].apply(lambda x: x.replace('$', '').replace(',', '')).astype('float')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assign the converted values back to the columns\n",
    "df['2016'] = df['2016'].apply(convert_currency)\n",
    "df['2017'] = df['2017'].apply(convert_currency)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use a lambda function to convert the percentage strings to numbers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.30\n",
       "1    0.10\n",
       "2    0.25\n",
       "3    0.04\n",
       "4   -0.15\n",
       "Name: Percent Growth, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Percent Growth'].apply(lambda x: x.replace('%', '')).astype('float') / 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Percent Growth'] = df['Percent Growth'].apply(convert_percent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Customer Number      int64\n",
       "Customer Name       object\n",
       "2016               float64\n",
       "2017               float64\n",
       "Percent Growth     float64\n",
       "Jan Units           object\n",
       "Month                int64\n",
       "Day                  int64\n",
       "Year                 int64\n",
       "Active              object\n",
       "dtype: object"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>125000.0</td>\n",
       "      <td>162500.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>500</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>920000.0</td>\n",
       "      <td>1012000.0</td>\n",
       "      <td>0.10</td>\n",
       "      <td>700</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>125</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>350000.0</td>\n",
       "      <td>490000.0</td>\n",
       "      <td>0.04</td>\n",
       "      <td>75</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>12750.0</td>\n",
       "      <td>-0.15</td>\n",
       "      <td>Closed</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name      2016       2017  Percent Growth  \\\n",
       "0            10002  Quest Industries  125000.0   162500.0            0.30   \n",
       "1           552278    Smith Plumbing  920000.0  1012000.0            0.10   \n",
       "2            23477   ACME Industrial   50000.0    62500.0            0.25   \n",
       "3            24900        Brekke LTD  350000.0   490000.0            0.04   \n",
       "4           651029         Harbor Co   15000.0    12750.0           -0.15   \n",
       "\n",
       "  Jan Units  Month  Day  Year Active  \n",
       "0       500      1   10  2015      Y  \n",
       "1       700      6   15  2014      Y  \n",
       "2       125      3   29  2016      Y  \n",
       "3        75     10   27  2015      Y  \n",
       "4    Closed      2    2  2014      N  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's look at the data so far\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "pd.to_numeric is another option for handling column conversions when invalid values are included"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    500.0\n",
       "1    700.0\n",
       "2    125.0\n",
       "3     75.0\n",
       "4      NaN\n",
       "Name: Jan Units, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.to_numeric(df['Jan Units'], errors='coerce')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    500.0\n",
       "1    700.0\n",
       "2    125.0\n",
       "3     75.0\n",
       "4      0.0\n",
       "Name: Jan Units, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Fill in the NaN with 0\n",
    "pd.to_numeric(df['Jan Units'], errors='coerce').fillna(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make sure to populate the original column of data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Jan Units\"] = pd.to_numeric(df['Jan Units'], errors='coerce').fillna(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "pd.to_datetime is very useful for working with date conversions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   2015-01-10\n",
       "1   2014-06-15\n",
       "2   2016-03-29\n",
       "3   2015-10-27\n",
       "4   2014-02-02\n",
       "dtype: datetime64[ns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.to_datetime(df[['Month', 'Day', 'Year']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Start_Date\"] = pd.to_datetime(df[['Month', 'Day', 'Year']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "      <th>Start_Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>125000.0</td>\n",
       "      <td>162500.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "      <td>2015-01-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>920000.0</td>\n",
       "      <td>1012000.0</td>\n",
       "      <td>0.10</td>\n",
       "      <td>700.0</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>Y</td>\n",
       "      <td>2014-06-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>125.0</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>Y</td>\n",
       "      <td>2016-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>350000.0</td>\n",
       "      <td>490000.0</td>\n",
       "      <td>0.04</td>\n",
       "      <td>75.0</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>Y</td>\n",
       "      <td>2015-10-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>12750.0</td>\n",
       "      <td>-0.15</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>N</td>\n",
       "      <td>2014-02-02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name      2016       2017  Percent Growth  \\\n",
       "0            10002  Quest Industries  125000.0   162500.0            0.30   \n",
       "1           552278    Smith Plumbing  920000.0  1012000.0            0.10   \n",
       "2            23477   ACME Industrial   50000.0    62500.0            0.25   \n",
       "3            24900        Brekke LTD  350000.0   490000.0            0.04   \n",
       "4           651029         Harbor Co   15000.0    12750.0           -0.15   \n",
       "\n",
       "   Jan Units  Month  Day  Year Active Start_Date  \n",
       "0      500.0      1   10  2015      Y 2015-01-10  \n",
       "1      700.0      6   15  2014      Y 2014-06-15  \n",
       "2      125.0      3   29  2016      Y 2016-03-29  \n",
       "3       75.0     10   27  2015      Y 2015-10-27  \n",
       "4        0.0      2    2  2014      N 2014-02-02  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check out the dataframe\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use np.where to convert the active column to a boolean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Active\"] = np.where(df[\"Active\"] == \"Y\", True, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "      <th>Start_Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>125000.0</td>\n",
       "      <td>162500.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>True</td>\n",
       "      <td>2015-01-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>920000.0</td>\n",
       "      <td>1012000.0</td>\n",
       "      <td>0.10</td>\n",
       "      <td>700.0</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>True</td>\n",
       "      <td>2014-06-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>125.0</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>True</td>\n",
       "      <td>2016-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>350000.0</td>\n",
       "      <td>490000.0</td>\n",
       "      <td>0.04</td>\n",
       "      <td>75.0</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>True</td>\n",
       "      <td>2015-10-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>12750.0</td>\n",
       "      <td>-0.15</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>False</td>\n",
       "      <td>2014-02-02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name      2016       2017  Percent Growth  \\\n",
       "0            10002  Quest Industries  125000.0   162500.0            0.30   \n",
       "1           552278    Smith Plumbing  920000.0  1012000.0            0.10   \n",
       "2            23477   ACME Industrial   50000.0    62500.0            0.25   \n",
       "3            24900        Brekke LTD  350000.0   490000.0            0.04   \n",
       "4           651029         Harbor Co   15000.0    12750.0           -0.15   \n",
       "\n",
       "   Jan Units  Month  Day  Year  Active Start_Date  \n",
       "0      500.0      1   10  2015    True 2015-01-10  \n",
       "1      700.0      6   15  2014    True 2014-06-15  \n",
       "2      125.0      3   29  2016    True 2016-03-29  \n",
       "3       75.0     10   27  2015    True 2015-10-27  \n",
       "4        0.0      2    2  2014   False 2014-02-02  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Customer Number             int64\n",
       "Customer Name              object\n",
       "2016                      float64\n",
       "2017                      float64\n",
       "Percent Growth            float64\n",
       "Jan Units                 float64\n",
       "Month                       int64\n",
       "Day                         int64\n",
       "Year                        int64\n",
       "Active                       bool\n",
       "Start_Date         datetime64[ns]\n",
       "dtype: object"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Many of the examples shown above can be used when reading in data using dtypes or converters arguments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_2 = pd.read_csv(\"https://github.com/chris1610/pbpython/blob/master/data/sales_data_types.csv?raw=True\", \n",
    "                   dtype={'Customer Number':'int'},\n",
    "                   converters={'2016':convert_currency,\n",
    "                               '2017': convert_currency,\n",
    "                               'Percent Growth': convert_percent,\n",
    "                               'Jan Units': lambda x: pd.to_numeric(x, errors='coerce'),\n",
    "                               'Active': lambda x: np.where(x == \"Y\", True, False)\n",
    "                              })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Customer Number      int64\n",
       "Customer Name       object\n",
       "2016               float64\n",
       "2017               float64\n",
       "Percent Growth     float64\n",
       "Jan Units          float64\n",
       "Month                int64\n",
       "Day                  int64\n",
       "Year                 int64\n",
       "Active              object\n",
       "dtype: object"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>125000.0</td>\n",
       "      <td>162500.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>920000.0</td>\n",
       "      <td>1012000.0</td>\n",
       "      <td>0.10</td>\n",
       "      <td>700.0</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>125.0</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>350000.0</td>\n",
       "      <td>490000.0</td>\n",
       "      <td>0.04</td>\n",
       "      <td>75.0</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>12750.0</td>\n",
       "      <td>-0.15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name      2016       2017  Percent Growth  \\\n",
       "0            10002  Quest Industries  125000.0   162500.0            0.30   \n",
       "1           552278    Smith Plumbing  920000.0  1012000.0            0.10   \n",
       "2            23477   ACME Industrial   50000.0    62500.0            0.25   \n",
       "3            24900        Brekke LTD  350000.0   490000.0            0.04   \n",
       "4           651029         Harbor Co   15000.0    12750.0           -0.15   \n",
       "\n",
       "   Jan Units  Month  Day  Year Active  \n",
       "0      500.0      1   10  2015   True  \n",
       "1      700.0      6   15  2014   True  \n",
       "2      125.0      3   29  2016   True  \n",
       "3       75.0     10   27  2015   True  \n",
       "4        NaN      2    2  2014  False  "
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This can not be applied at the time the data is read in\n",
    "df_2[\"Start_Date\"] = pd.to_datetime(df_2[['Month', 'Day', 'Year']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Number</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>Percent Growth</th>\n",
       "      <th>Jan Units</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>Year</th>\n",
       "      <th>Active</th>\n",
       "      <th>Start_Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10002</td>\n",
       "      <td>Quest Industries</td>\n",
       "      <td>125000.0</td>\n",
       "      <td>162500.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>500.0</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>2015</td>\n",
       "      <td>True</td>\n",
       "      <td>2015-01-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>552278</td>\n",
       "      <td>Smith Plumbing</td>\n",
       "      <td>920000.0</td>\n",
       "      <td>1012000.0</td>\n",
       "      <td>0.10</td>\n",
       "      <td>700.0</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>2014</td>\n",
       "      <td>True</td>\n",
       "      <td>2014-06-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23477</td>\n",
       "      <td>ACME Industrial</td>\n",
       "      <td>50000.0</td>\n",
       "      <td>62500.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>125.0</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>2016</td>\n",
       "      <td>True</td>\n",
       "      <td>2016-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24900</td>\n",
       "      <td>Brekke LTD</td>\n",
       "      <td>350000.0</td>\n",
       "      <td>490000.0</td>\n",
       "      <td>0.04</td>\n",
       "      <td>75.0</td>\n",
       "      <td>10</td>\n",
       "      <td>27</td>\n",
       "      <td>2015</td>\n",
       "      <td>True</td>\n",
       "      <td>2015-10-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>651029</td>\n",
       "      <td>Harbor Co</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>12750.0</td>\n",
       "      <td>-0.15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>False</td>\n",
       "      <td>2014-02-02</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Customer Number     Customer Name      2016       2017  Percent Growth  \\\n",
       "0            10002  Quest Industries  125000.0   162500.0            0.30   \n",
       "1           552278    Smith Plumbing  920000.0  1012000.0            0.10   \n",
       "2            23477   ACME Industrial   50000.0    62500.0            0.25   \n",
       "3            24900        Brekke LTD  350000.0   490000.0            0.04   \n",
       "4           651029         Harbor Co   15000.0    12750.0           -0.15   \n",
       "\n",
       "   Jan Units  Month  Day  Year Active Start_Date  \n",
       "0      500.0      1   10  2015   True 2015-01-10  \n",
       "1      700.0      6   15  2014   True 2014-06-15  \n",
       "2      125.0      3   29  2016   True 2016-03-29  \n",
       "3       75.0     10   27  2015   True 2015-10-27  \n",
       "4        NaN      2    2  2014  False 2014-02-02  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
